R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(readxl)
Electricity_Data_US <- read_excel('electricity_data_US.xls', sheet='electricity_data_US')
Electricity_Data_US_ts <- ts(Electricity_Data_US$'US_ALL_SECTORS (thousand megawatts hours)', frequency = 12, start = c(2001,1), names = c("Date", "US_ALL_SECTORS"))
Electricity_Data_US_ts
##         Jan    Feb    Mar    Apr    May    Jun    Jul    Aug    Sep    Oct
## 2001 332493 282940 300707 278079 300492 327694 357614 370533 306929 294734
## 2002 319941 281826 302549 289848 307675 341023 381542 374586 331279 307059
## 2003 341989 299249 304317 285756 307545 328694 374396 381816 323136 306741
## 2004 346546 314280 308812 290560 327380 345085 377332 368439 335622 312450
## 2005 343121 298500 317458 289562 315062 363672 402274 404941 350218 316398
## 2006 328658 307333 318730 297858 330616 364260 410421 407763 332055 321567
## 2007 353531 323230 320471 303129 330203 362755 393226 421797 355394 332615
## 2008 362998 325106 324630 305865 325245 373109 402900 388987 338056 318547
## 2009 354993 300887 310603 289537 311306 347658 372542 381221 327401 307040
## 2010 360957 319735 312168 287800 327936 375759 409725 408884 346045 307921
## 2011 362872 313127 318710 302401 323628 367727 418693 406511 337931 308699
## 2012 339526 309389 309090 295229 336516 360825 414641 395700 334586 311652
## 2013 348967 309728 325399 299333 322156 356823 394846 385286 340941 314925
## 2014 377250 324344 331818 297625 324718 357838 385773 384335 339881 314516
## 2015 360449 334471 324186 294128 322081 362403 400412 392110 350115 312106
## 2016 352714 313680 304384 292889 316779 367776 411881 409695 351479 312940
## 2017 344325 291043 319329 295354 323439 358514 404425 384731 335908 318662
## 2018 373230 306894 321547 300756 338948 371886 411290 408028 356258 324932
## 2019 359509 315026 326657 296663 330423 352988 410038 401430 360518 320352
## 2020 341850 319550 309587 279583 304593 351745 409562 398280 333258 313531
## 2021 350796 326223 312285 292504 318859 373754 404749 413353 348201 319638
## 2022 378967 327767 325952                                                 
##         Nov    Dec
## 2001 278934 305496
## 2002 296290 324834
## 2003 297867 331680
## 2004 302101 341948
## 2005 306115 348101
## 2006 309159 336283
## 2007 314103 346290
## 2008 310046 343898
## 2009 296635 350507
## 2010 306010 362119
## 2011 304102 335740
## 2012 305976 334635
## 2013 314540 353021
## 2014 317489 337951
## 2015 300647 324421
## 2016 297056 345337
## 2017 308045 350408
## 2018 322369 342139
## 2019 315849 338402
## 2020 301250 344346
## 2021 315495 339684
## 2022

Including Plots

Load the Necesssary Libraries

library(fpp)
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Loading required package: fma
## Loading required package: expsmooth
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: tseries
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2 3.4.3
## 
## 
## Attaching package: 'fpp2'
## The following objects are masked from 'package:fpp':
## 
##     ausair, ausbeer, austa, austourists, debitcards, departures,
##     elecequip, euretail, guinearice, oil, sunspotarea, usmelec
library(TTR)

Plot the Time-Series Data

This is a plot of Electrical Energy Consumption in the U.S.

attributes(Electricity_Data_US_ts)
## $tsp
## [1] 2001.000 2022.167   12.000
## 
## $class
## [1] "ts"
plot(Electricity_Data_US_ts)

Acf(Electricity_Data_US_ts)

Take Mean of all available history

The Mean Forecast shows the Simple Average Forecast which computes the average of past observations to produce the forecast. All the data is given an equal weight from which the mean is produced. It can be seen visually from the plot that is not a good forecast model for the data.

#take Mean of all available history
mean_forecast <- meanf(Electricity_Data_US_ts,36)
plot(mean_forecast)

Take Naive Forecast of all available history

The Naive Forecast uses the previous value to produce a forecast. This is known as a “no-change” forecast since it uses the previous data point as the forecast model. However, we can see that the confidence intervals are larger for the Naive forecast than for the Mean Forecast (both 80% and 95% confidence intervals). It can be seen visually from the plot that is not a good forecast model for the data.

# Naive
naive_forecast <- naive(Electricity_Data_US_ts,36)
plot(naive_forecast)

Random Walk

The Random Walk Forecast shows a forecast from the previous value that is flat, which is similar to the Naive Forecast. However, we can see that the confidence intervals are larger for the Random Walk forecast than for the Mean Forecast (both 80% and 95% confidence intervals). It can be seen visually from the plot that is not a good forecast model for the data.

# Random Walk
rwf_forecast <- rwf(Electricity_Data_US_ts,36, drift=FALSE)
plot(rwf_forecast)

Random Walk with Drift

The Random Walk with Drift Forecast closely resembles the Random Walk Forecast. The Confidence Interval is the same as the Random Walk Forecast. It can be seen visually from the plot that is not a good forecast model for the data.

# Random Walk with Drift
rwf_forecast_drift <- rwf(Electricity_Data_US_ts,36, drift=TRUE)
plot(rwf_forecast_drift)

Seasonal Naive

The Seasonal Naive Model produces a forecast that continues the same seasonal pattern of the actual data. Because the time-series data shows is highly seasonal in a regular pattern, the Seasonal Naive Forecast is a good forecast model.

snaive_forecast <- snaive(Electricity_Data_US_ts,36)
plot(snaive_forecast)

Moving Averages

Moving Average with Order = 3

The Moving Average (Order = 3) appears to be a good model for the data. The data is seasonal and this plot shows the seasonal nature of the time-series data. Since there are sudden shifts in the data, the smaller order of the moving average is a better fit for the data than larger orders.

MA3_forecast <- ma(Electricity_Data_US_ts,order=3)
plot(MA3_forecast)

Moving Average with Order = 6

The Moving Average (Order = 6) smooths out the data more than the Moving Average plot for Order=3. The plot still shows the seasonal nature of the data, even though the graph is slightly smoother. The larger the order of the Moving Average Forecast, the greater the smoothing effect in the plot.

MA6_forecast <- ma(Electricity_Data_US_ts,order=6)
plot(MA6_forecast)

Moving Average with Order = 12

The Moving Average (Order = 12) shows more smoothing of the data than the previous plot. The larger the order of the Moving Average, the greater the smoothing effect that will be shown in the plot. The regularity of the seasonal nature of the data is not as obvious in this plot.

MA12_forecast <- ma(Electricity_Data_US_ts,order=12)
plot(MA12_forecast)

Moving Average with Order = 18

The Moving Average (Order = 18) appears to show the seasonal nature of the data better than the Moving Average (Order = 12).

MA18_forecast <- ma(Electricity_Data_US_ts,order=18)
plot(MA18_forecast)

Moving Average with Order = 36

The Moving Average (Order = 36) shows the most smoothing out of all the Moving Average plots so far. This plot demonstrates that the larger the order, the greater the smoothing effect.

MA36_forecast <- ma(Electricity_Data_US_ts,order=36)
plot(MA36_forecast)

Moving Average with Order = 72

The Moving Average (72) shows the most smoothing out of all the Moving Average plots. This plot demonstrates that the larger the order of the Moving Average, the greater the smoothing effect of the graph.

MA72_forecast <- ma(Electricity_Data_US_ts,order=72)
plot(MA72_forecast)

Plot the Time-Series and Different Model Forecasts in a Single Chart

The Moving Average Forecasts are shown in the graph with the time-series data. Generally, the larger the order of the Moving Average, the greater the smoothing effect. The Moving Averages with a smaller order show a greater amplitude and a better fit of the data than the Moving Averages with a larger order.

The Mean Forecast shows a forecast that is calculated from the mean of all the data points . All the data points are given an equal weight when calculating the Mean Forecast. That is why this looks like a straight line forecast.

The Naive and Random Walk Forecasts are calculated from the value of the last data point.

plot(mean_forecast)
lines(naive_forecast$mean,col="red")
lines(rwf_forecast$mean,col="green")
lines(rwf_forecast_drift$mean,col="violet")
lines(snaive_forecast$mean,col="blue")
lines(MA6_forecast,col="darkred")
lines(MA12_forecast,col="cyan")
lines(MA18_forecast,col="purple")
lines(MA36_forecast,col="orange")
lines(MA72_forecast,col="darkgreen")

# what other attributes are there?
attributes(naive_forecast)
## $names
##  [1] "method"    "model"     "lambda"    "x"         "fitted"    "residuals"
##  [7] "series"    "mean"      "level"     "lower"     "upper"    
## 
## $class
## [1] "forecast"
attributes(snaive_forecast)
## $names
##  [1] "method"    "model"     "lambda"    "x"         "fitted"    "residuals"
##  [7] "series"    "mean"      "level"     "lower"     "upper"    
## 
## $class
## [1] "forecast"

Decomposition

ets_forecast <- ets(Electricity_Data_US_ts)
plot(ets_forecast)

attributes(ets_forecast)
## $names
##  [1] "loglik"     "aic"        "bic"        "aicc"       "mse"       
##  [6] "amse"       "fit"        "residuals"  "fitted"     "states"    
## [11] "par"        "m"          "method"     "series"     "components"
## [16] "call"       "initstate"  "sigma2"     "x"         
## 
## $class
## [1] "ets"
ets_forecast$mse
## [1] 74587535

Holt-Winters Forecast Model

Holt-Winters Forecast #1 - With No Filters

HW_forecast <- HoltWinters(Electricity_Data_US_ts)
HoltWinters(Electricity_Data_US_ts)
## Holt-Winters exponential smoothing with trend and additive seasonal component.
## 
## Call:
## HoltWinters(x = Electricity_Data_US_ts)
## 
## Smoothing parameters:
##  alpha: 0.3902766
##  beta : 0.01085984
##  gamma: 0.331543
## 
## Coefficients:
##            [,1]
## a   354874.6578
## b      223.4292
## s1  -50275.8367
## s2  -20339.1656
## s3   21266.8941
## s4   63687.1146
## s5   57589.3653
## s6     331.7626
## s7  -28307.7701
## s8  -35421.2163
## s9   -3891.0857
## s10  14376.2408
## s11 -27411.4034
## s12 -28011.9778
attributes(HW_forecast)
## $names
## [1] "fitted"       "x"            "alpha"        "beta"         "gamma"       
## [6] "coefficients" "seasonal"     "SSE"          "call"        
## 
## $class
## [1] "HoltWinters"
summary(HW_forecast)
##              Length Class  Mode     
## fitted       972    mts    numeric  
## x            255    ts     numeric  
## alpha          1    -none- numeric  
## beta           1    -none- numeric  
## gamma          1    -none- numeric  
## coefficients  14    -none- numeric  
## seasonal       1    -none- character
## SSE            1    -none- numeric  
## call           2    -none- call

The Holt-Winters plot shows a good fit with the actual data in terms of the trend and the seasonality. This shows that the Holt-Winters plot is a good forecast model for this time-series data. Based on the comparison between the actual and forecast data (red and black lines), the error is minimal for this forecast model.

plot(HW_forecast)

Holt-Winters Forecast #2 - Beta is TRUE and Gamma is TRUE

This filter has no exponential smoothing and a seasonal model is fitted to the data.

SSE_Simple <- HoltWinters(Electricity_Data_US_ts, beta=TRUE, gamma=TRUE)
attributes(SSE_Simple)
## $names
## [1] "fitted"       "x"            "alpha"        "beta"         "gamma"       
## [6] "coefficients" "seasonal"     "SSE"          "call"        
## 
## $class
## [1] "HoltWinters"
SSE_Simple$SSE
## [1] 132840676529

The Holt-Winters plot shows a good fit with the actual data in terms of the seasonality. However, this forecast model shows more errors between the actual and forecast data (red and black lines) than the previous Holt-Winters plot without the filters. The previous Holt-Winters plot was a better fit of the data.

plot(SSE_Simple)

Holt-Winters Forecast #3 - Beta is FALSE and Gamma is TRUE

This filter has exponential smoothing and a seasonal model is fitted to the data.

SSE_Simple_2 <- HoltWinters(Electricity_Data_US_ts, beta=FALSE, gamma=TRUE)
attributes(SSE_Simple_2)
## $names
## [1] "fitted"       "x"            "alpha"        "beta"         "gamma"       
## [6] "coefficients" "seasonal"     "SSE"          "call"        
## 
## $class
## [1] "HoltWinters"
SSE_Simple_2$SSE
## [1] 25755978737

The Holt-Winters plot shows a better fit with the actual data than the previous model. When the exponential smoothing is applied to the data, the forecast model provides a better fit. However, the Holt-Winters plot without filters still shows a better fit than either of the filtered Holt-Winters plot.

plot(SSE_Simple_2)

Holt-Winters Forecast #4 - Beta is TRUE and Gamma is FALSE.

This filter has no exponential smoothing and a non-seasonal model is fitted to the data.

SSE_Simple_3 <- HoltWinters(Electricity_Data_US_ts, beta=TRUE, gamma=FALSE)
attributes(SSE_Simple_3)
## $names
## [1] "fitted"       "x"            "alpha"        "beta"         "gamma"       
## [6] "coefficients" "seasonal"     "SSE"          "call"        
## 
## $class
## [1] "HoltWinters"
SSE_Simple_3$SSE
## [1] 395769094686

The Holt-Winters plot shows a worse fit with the actual data than any of the previous Holt-Winters model. The main difference is that the Holt-Winters filter assumes a non-seasonal model. Since this time-series data shows a highly seasonal pattern, it is no surprise that there are significant differences between the actual data and the forecast data (black and red lines) from this model.

plot(SSE_Simple_3)

Pick an Accuracy Measure, Compare Your Models, and State the Best Model Based on the Accuracy Comparison

Summary

The best model is Holt-Winters (no filters).

The MAPE (Mean Absolute Scaled Error) is selected as the accuracy measure selected to compare the different forecast models. The Holt-Winters Forecast (no filters) is the best model based on this accuracy comparison. It has a MAPE of 2.331722 which is the lowest out of all the forecasts.

The Holt-Winters Forecast also has the lowest RMSE (Root Mean Squared Error) and MAE (Mean Absolute Error) when compared with the other forecasts. Given the seasonal nature of the time-series data, it is not surprising that Holt-Winters is the best model in this accuracy comparison.

The second-best model is the Holt-Winters Forecast with Filters (beta=FALSE, gamma=TRUE). It has the second-lowest MAPE (Mean Absolute Scaled Error), RMSE (Root Mean Squared Error), and MAE (Mean Absolute Error) when compared with the other forecasts.

The third-best model is the Seasonal Naive Forecast. It has the third-lowest MAPE (Mean Absolute Scaled Error), RMSE (Root Mean Squared Error), and MAE (Mean Absolute Error) when compared with the other forecasts.

Moving Average Accuracies

Moving Average (Order = 3)

accuracy(MA3_forecast, Electricity_Data_US_ts, h=12)
##                 ME     RMSE      MAE        MPE     MAPE       ACF1 Theil's U
## Test set -62.89592 13318.94 11363.18 -0.3054734 3.388174 -0.2220107 0.4227245

Moving Average (Order = 6)

accuracy(MA6_forecast, Electricity_Data_US_ts, h=12)
##                 ME     RMSE      MAE        MPE     MAPE      ACF1 Theil's U
## Test set -102.0174 27587.94 23659.78 -0.7481517 6.960664 0.3994861 0.8629468

Moving Average (Order = 12)

accuracy(MA12_forecast, Electricity_Data_US_ts, h=12)
##               ME     RMSE      MAE        MPE     MAPE     ACF1 Theil's U
## Test set 455.537 33368.27 27510.49 -0.7941443 8.033122 0.545415   1.01404

Moving Average (Order = 18)

accuracy(MA18_forecast, Electricity_Data_US_ts, h=12)
##                 ME     RMSE      MAE       MPE    MAPE      ACF1 Theil's U
## Test set -603.8902 35530.28 29054.47 -1.163674 8.51316 0.5882638   1.06695

Moving Average (Order = 36)

accuracy(MA36_forecast, Electricity_Data_US_ts, h=12)
##                ME     RMSE      MAE        MPE     MAPE      ACF1 Theil's U
## Test set 628.4826 33683.51 27783.54 -0.7525422 8.090469 0.5479755 0.9695606

Moving Average (Order = 72)

accuracy(MA72_forecast, Electricity_Data_US_ts, h=12)
##              ME     RMSE      MAE        MPE     MAPE      ACF1 Theil's U
## Test set 632.21 33554.61 27597.78 -0.7422381 8.021641 0.5502105 0.8789916

Forecast Accuracies

Mean Forecast

accuracy(mean_forecast)
##                        ME     RMSE      MAE       MPE     MAPE     MASE
## Training set 1.758458e-11 34217.74 28000.58 -0.985932 8.221914 2.719387
##                   ACF1
## Training set 0.5730148

Naive Forecast

accuracy(naive_forecast)
##                     ME     RMSE     MAE        MPE     MAPE     MASE     ACF1
## Training set -25.75197 31674.55 27003.3 -0.4384288 8.025504 2.622533 0.202647

Random-Walk Forecast

accuracy(rwf_forecast)
##                     ME     RMSE     MAE        MPE     MAPE     MASE     ACF1
## Training set -25.75197 31674.55 27003.3 -0.4384288 8.025504 2.622533 0.202647

Random-Walk Forecast with Drift

accuracy(rwf_forecast_drift)
##                         ME     RMSE      MAE        MPE    MAPE     MASE
## Training set -7.333694e-12 31674.54 27002.29 -0.4307069 8.02486 2.622434
##                  ACF1
## Training set 0.202647

Seasonal Naive Forecast

accuracy(snaive_forecast)
##                   ME     RMSE      MAE       MPE     MAPE MASE      ACF1
## Training set 1737.77 12937.34 10296.65 0.4518986 3.019937    1 0.4693802

Holt-Winters Accuracies

Holt-Winters Forecast

forecast_ets_hw <- forecast(HW_forecast, h=12)
accuracy(forecast_ets_hw)
##                     ME     RMSE    MAE        MPE     MAPE      MASE       ACF1
## Training set -741.1512 9778.629 7875.2 -0.3154225 2.331722 0.7648313 0.09550884

Holt-Winters Forecast (SSE_Simple 1) (beta=TRUE, gamma=TRUE)

forecast_ets_sse_simple_hw <- forecast(SSE_Simple, h=12)
accuracy(forecast_ets_sse_simple_hw)
##                    ME     RMSE      MAE        MPE     MAPE     MASE      ACF1
## Training set 72.13175 23380.96 18023.74 -0.1438919 5.327583 1.750447 0.2449821

Holt-Winters Forecast (SSE_Simple 2) (beta=FALSE, gamma=TRUE)

forecast_ets_sse_simple_2_hw <- forecast(SSE_Simple_2, h=12)
accuracy(forecast_ets_sse_simple_2_hw)
##                    ME     RMSE      MAE         MPE     MAPE      MASE
## Training set 250.8219 10295.23 8244.493 0.005103653 2.432738 0.8006967
##                   ACF1
## Training set -0.166648

Holt-Winters Forecast (SSE_Simple 3) (beta=TRUE, gamma=FALSE)

forecast_ets_sse_simple_3_hw <- forecast(SSE_Simple_3, h=12)
accuracy(forecast_ets_sse_simple_3_hw)
##                    ME     RMSE      MAE       MPE     MAPE     MASE       ACF1
## Training set 170.2119 39551.29 33713.86 0.2144175 10.14931 3.274255 -0.1017875

Setup Training Set and Test Set

training_set = window(Electricity_Data_US_ts, start = c(2001,1), end=c(2018,2))
test_set = window(Electricity_Data_US_ts, start = c(2018,3), end=c(2022,3))

forecast_ets_hw <- forecast(HoltWinters(training_set), h=48)
accuracy(forecast_ets_hw, test_set, h=48)
##                      ME      RMSE      MAE        MPE     MAPE      MASE
## Training set  -867.6619  9899.848 8014.994 -0.3440075 2.383951 0.7819938
## Test set     -5093.5139 11819.418 9256.124 -1.6183754 2.790274 0.9030864
##                   ACF1 Theil's U
## Training set 0.1142370        NA
## Test set     0.3821497 0.3920909
accuracy(naive(training_set,h=48), test_set)
##                      ME     RMSE      MAE        MPE      MAPE     MASE
## Training set  -124.8732 31518.74 26944.08 -0.4700109  8.047214 2.628836
## Test set     35927.2083 50520.87 38677.83  9.5585885 10.508863 3.773656
##                   ACF1 Theil's U
## Training set 0.1935375        NA
## Test set     0.5736193  1.475309
accuracy(rwf(training_set,h=48), test_set)
##                      ME     RMSE      MAE        MPE      MAPE     MASE
## Training set  -124.8732 31518.74 26944.08 -0.4700109  8.047214 2.628836
## Test set     35927.2083 50520.87 38677.83  9.5585885 10.508863 3.773656
##                   ACF1 Theil's U
## Training set 0.1935375        NA
## Test set     0.5736193  1.475309
accuracy(snaive(training_set,h=48), test_set)
##                    ME     RMSE      MAE       MPE     MAPE     MASE      ACF1
## Training set 1764.402 12982.25 10249.43 0.4685888 3.000859 1.000000 0.4905627
## Test set     2909.625 13147.75 10838.79 0.7180272 3.138524 1.057502 0.2120350
##              Theil's U
## Training set        NA
## Test set     0.3989327

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.